home *** CD-ROM | disk | FTP | other *** search
-
- #include <stdio.h>
- #include <string.h>
- #include "parsehtm.h"
-
- static FILE *htmlFile = 0;
-
- /* Global Dictionaries */
- Dictionary endTags;
- Dictionary handlerDict;
-
- /*
- * Tag Handlers are functions that return void,
- * and take four arguements,
- * 1) the tag string, which they should alter to
- * the parsed text.
- * 2) an arg string, not containing any registered endTags
- * 3) any registered endTag
- * 4) a dictionary of data for the tag, from dictForTag()
- * The keys for tagDict are const char *,and the values are String
- *
- * The prototype for a handler is:
- * void handler(String ts,String as,String et,Dictionary td);
- */
-
- /* This function allocates space for the global dictionaries
- * it must be called before any handlers or end tags are registered. */
- void initializeHtmlParsingLibrary()
- {
- endTags = dict_alloc();
- handlerDict = dict_alloc();
- }
-
- /*
- * parseHtml() provides the primary interface
- * to this library. It takes the file name
- * and returns a String containing the parsed html.
- */
-
- String parseHtml(const char *fileName)
- {
- String retVal;
-
- if(fileName && *fileName)
- {
- htmlFile = fopen(fileName,"r");
- }
-
- if(htmlFile)
- {
- retVal = mainHtmlParser((const char *) 0);
- fclose(htmlFile);
- }
-
- return retVal;
- }
-
- /*
- * mainHtmlParser is at the heart of the parsing code
- * This function loops over the file, stopping at the end
- * of the file, a stop character or a stop string.
- * It seperates the file into tags and plain text,
- * then calls the functions handleTag and handlePlainText
- * to parse the data.
- */
- String mainHtmlParser(const char *stopStr)
- {
- char c,stopChar;
- String tmpBuffer = 0;
- String mainBuffer = 0;
- int inTag = 0;
-
- /* Initialize the buffers. */
- tmpBuffer = string_alloc(64);
- mainBuffer = string_alloc(4096);
-
- /* If we got a stop string, see if
- * it is really a character. This allows
- * the function to have dual functionality off
- * of only one argument.
- */
- if(stopStr)
- {
- if(*stopStr && (strlen(stopStr) == 1))
- {
- stopChar = stopStr[0];
- }
- else
- {
- stopChar = '\0';
- }
- }
-
- /* Loop to the end of the file. */
- while(!feof(htmlFile))
- {
- /* Get a character at each step of the loop */
- c = fgetc(htmlFile);
-
- /* See what character this is*/
-
- if(c == '>')/* end of a tag */
- {
- if(!inTag)/*we are not in a tag, this is an error*/
- {
- exit(1);
- }
-
- /* Close the tag */
- inTag = 0;
-
- string_appendChar(tmpBuffer,c);
-
- /* See if this tag is the stop string */
-
- if(stopStr && !strcmp(tmpBuffer->string,stopStr))
- {
- return mainBuffer;
- }
- else
- {
- /* Handle the tag we just found */
- handleTag(tmpBuffer);
-
- /* Add the parsed text to the main buffer */
- string_appendString(mainBuffer,tmpBuffer->string);
-
- /* Reset the tmp buffer */
- string_empty(tmpBuffer);
- }
- }
- else if(c == '<')/* Start of a tag */
- {
- if(inTag)/*we are already in a tag, this is an error*/
- {
- exit(1);
- }
-
- inTag = 1;
-
- /* If we have some data, it is plain text,
- * add it to the main buffer.
- */
- if(tmpBuffer->string[0])
- {
- handlePlainText(tmpBuffer);
-
- /* Add the parsed text to the main buffer */
- string_appendString(mainBuffer,tmpBuffer->string);
-
- /* Reset the tmp buffer */
- string_empty(tmpBuffer);
-
- }
-
- /* Reset the tmp buffer */
- string_setStringValue(tmpBuffer,"<");
- }
- else if(c == stopChar)/* Found the stop char */
- {
- string_appendChar(tmpBuffer,c);
-
- /* Add the parsed text to the main buffer */
- string_appendString(mainBuffer,tmpBuffer->string);
-
- break;
- }
- else/* normal character */
- {
- string_appendChar(tmpBuffer,c);
- }
- }
-
- /* free the tmp buffer */
- string_free(tmpBuffer);
-
- /* Return the parsed text */
- return mainBuffer;
- }
-
- /*
- * Handle tag is called by mainHtmlParser.
- * This function looks for a tag handler and calls
- * it with the appropriate arguments.
- */
- void handleTag(String tagString)
- {
- Dictionary tagDict = 0;
- String argString = 0;
- String endTag = 0;
- void (*handler)();
- String tag;
-
- /* Create a dictionary for the tag
- * This will make it easier to access
- * information about the tag
- */
- tagDict = dictForTag(tagString);
-
- /* The tag dictionary should have a TAG value */
- if(tag = (String)dict_valueForKey(tagDict,"TAG"))
- {
- if(tag->string)
- {
- /* See if there is an endtag or a handler registered*/
- endTag = dict_valueForKey(endTags, tag->string);
-
- handler = dict_valueForKey(handlerDict, tag->string);
- }
- }
-
- /* If there is a handler, use it */
- if(handler)
- {
- /* If there is an end tag, parse up to it
- * be sure to remove the end tag from the body text.
- */
- if(endTag && endTag->string)
- {
- char * finalPointy = 0;
-
- argString = mainHtmlParser(endTag->string);
-
- /* Get rid of the end tag */
-
- if(endTag->string[0] == '<')/* this is a real tag */
- {
- finalPointy = strrchr(argString->string,'<');
-
- /* End the string at the final <, thus removing the end tag */
- if(finalPointy) *finalPointy = '\0';
- }
- }
-
- /* Call the handler */
- handler(tagString,argString,endTag,tagDict);
- }
- else /* No handler, treat this as plain text */
- {
- handlePlainText(tagString);
- }
-
- /* Clean up */
- dict_freeWithData(tagDict,string_free);
- string_free(argString);
- }
-
- void handlePlainText(String plainText)
- {
- void (*handler)();
-
- /* See if there is a default handler */
- handler = dict_valueForKey(handlerDict,"DEFAULT");
-
- /* If there is a default handler, use it*/
- if(handler)
- {
- handler(plainText,0,0,0);
- }
- }
-
- /*
- * Convienence function that searches a c string for a tag
- * it returns a pointer into the string, after the tag.
- * This return value is used for followup calls.
- */
-
- char * nextTag(char *start, String tagString)
- {
- int inQuote = 0;
-
- if(start && *start)
- {
- /* Move past any starting delimiters */
-
- while((*start == ' ')||(*start == '<'))
- {
- start++;
- }
-
- string_empty(tagString);
-
- while(*start && (((*start != ' ')&&(*start != '>'))||inQuote))
- {
- if(*start == '"')/*Look for quotes, dont include them*/
- {
- if(inQuote) inQuote = 0;
- else inQuote=1;
- }
- else
- {
- string_appendChar(tagString,*start);
- }
- start++;
- }
-
- start++;/*Move to next character for subsequent next search*/
- }
-
- return start;
- }
-
- /*
- * dictForTag is a convience function
- * it takes a tag string, and returns
- * a dictionary of key value pairs.
- * One key should be TAG. All of the keys
- * are capitalized.
- */
- Dictionary dictForTag(String tagString)
- {
- Dictionary tagDict = 0;
- String tag;
- String tmpString;
- char *slide = (char *)0;
- char *key,*value;
- String dataToAdd;
-
- /* Make the dictionary */
- tagDict = dict_alloc();
-
- /* If there is no tag string, return the empty dict */
- if(!tagString || !(tagString->string)) return tagDict;
-
- /* Copy the tag string, since strtok will alter data */
- tag = string_alloc(strlen(tagString->string) + 1);
- string_setStringValue(tag, tagString->string);
-
- /* Make a temporary string, to help with our work */
- tmpString = string_alloc(strlen(tagString->string) + 1);
-
- /*Get the first token*/
- slide = nextTag(tag->string,tmpString);
-
- /* If the current token starts with a <, get rid of the < */
- if(tmpString->string && (tmpString->string[0] == '<'))
- {
- string_crop(tmpString,1);
- }
-
- /* Capitalize the tag string, and add it to the dict */
- if(tmpString->string)
- {
- string_toUpper(tmpString);
-
- dataToAdd = string_alloc(strlen(tmpString->string) +1);
- string_setStringValue(dataToAdd, tmpString->string);
-
- dict_setValueForKey(tagDict,"TAG", dataToAdd);
- }
-
- /* Loop over the tag string, breaking it into tokens */
- do
- {
-
- /*Get the next token*/
- slide = nextTag(slide,tmpString);
-
- /* If we got a token */
- if(tmpString->string)
- {
- /* See if it is binary or unary */
- if(value = strchr(tmpString->string,'='))
- {
- key = tmpString->string;
-
- *value = '\0'; /* set the = to a null */
- value++; /* Move value past the old = */
-
- string_toUpper(tmpString);/* Will only get the key */
-
- /* Clean up any trailing or leading quotes */
- if(value[strlen(value)-1] == '\"')
- {
- value[strlen(value)-1] = '\0';
- }
-
- if(value[0] == '\"')
- {
- value++;
- }
-
- /* Add the attribute to the tag dict */
- dataToAdd = string_alloc(strlen(value) +1);
- string_setStringValue(dataToAdd,value);
-
- dict_setValueForKey(tagDict, key, dataToAdd);
- }
- else
- {
- if(tmpString->string[strlen(tmpString->string)-1] == '\"')
- {
- string_chop(tmpString,1);
- }
-
- if(tmpString->string[0] == '\"')
- {
- string_crop(tmpString,1);
- }
-
- string_toUpper(tmpString);
-
- /* Add the unary attribute to the tag dict */
-
- dataToAdd = string_alloc(strlen(tmpString->string) +1);
- string_setStringValue(dataToAdd, tmpString->string);
-
- dict_setValueForKey(tagDict, tmpString->string, dataToAdd);
- }
- }
-
- }while (slide && *slide);
-
- string_free(tag);
- string_free(tmpString);
-
- return tagDict;
- }
-
- /*
- * stringForTagDict is provided for programmers
- * writing handler functions.
- * This function takes a tag dictionary
- * and returns the cooresponding tag string.
- * the string is allocated, and is the callers
- * responsibility.
- */
- String stringForTagDict(Dictionary tagDict)
- {
- String tagString;
- DictState iterator;
- const char *key = 0;
- String value = 0;
-
- /* Allocate the string */
- tagString = string_alloc(64);
-
- /* Make sure we have tag data */
- if(tagDict)
- {
- /* Add the tag to the string */
- value = dict_valueForKey(tagDict,"TAG");
-
- if(value && value->string)
- {
- string_appendString(tagString,"<");
- string_appendString(tagString,value->string);
-
- /* Loop over the dictionary, adding tag attributes */
- iterator = dict_initState(tagDict);
-
- while(dict_nextState(&iterator))
- {
- key = (const char *) iterator.curNode->key;
- value = (String) iterator.curNode->value;
-
- if(strcmp(key,"TAG") != 0)/* Ignore the tag */
- {
- if(!strcmp(key,value->string))/* This is a unary key */
- {
- string_appendString(tagString," ");
- string_appendString(tagString,key);
- }
- else /* Binary attribute */
- {
- string_appendString(tagString," ");
- string_appendString(tagString,key);
- string_appendString(tagString,"=\"");
- string_appendString(tagString,value->string);
- string_appendString(tagString,"\"");
- }
- }
- }
-
- /* Close the tag string */
- string_appendString(tagString,">");
- }
- }
- /* Return the string */
- return tagString;
- }
-